Data import, cleaning, and exploration

cf <- readRDS("data/campfire-tweets-2020-04-17.Rds")

Sources <- cf %>%
  filter(str_detect(screen_name, "CALFIRE_ButteCo|Cal_Fire|ButteSheriff|ChicoPolice|ChicoFD|CountyOfButte|Paradise_CA"))


no_outliers <- top_n(Sources, -29, created_at_pst)

no_outliers %>%
  group_by(screen_name) %>%
  summarize(min(created_at_pst))
## # A tibble: 3 x 2
##   screen_name     `min(created_at_pst)`
##   <chr>           <dttm>               
## 1 ButteSheriff    2018-11-08 08:03:55  
## 2 CALFIRE_ButteCo 2018-11-08 06:51:47  
## 3 ChicoFD         2018-11-08 07:46:17
no_outliers$screen_name <- as.factor(no_outliers$screen_name)

no_outliers %>% group_by(tweet_hour, screen_name, tweet_min) %>% 
       summarize(tweet_count=n()) %>% 
    ggplot(aes(x=tweet_hour, y=tweet_count, fill=screen_name)) + geom_col()

range(Sources$created_at_pst)
## [1] "2018-11-08 06:51:47 PST" "2018-12-19 13:46:14 PST"
plot.fav  <- no_outliers %>% filter(favorite_count>1) %>% ggplot(aes(x=favorite_count, fill=screen_name)) + geom_histogram()
plot.rt  <- no_outliers %>% filter(retweet_count>1) %>% ggplot(aes(x=retweet_count, fill=screen_name)) + geom_histogram()
plot.quo  <- no_outliers %>% filter(quote_count>1) %>% ggplot(aes(x=quote_count, fill=screen_name)) + geom_histogram()
plot.rply  <- no_outliers %>% filter(reply_count>1) %>% ggplot(aes(x=reply_count, fill=screen_name)) + geom_histogram()

gridExtra::grid.arrange(plot.fav, plot.rt, plot.quo, plot.rply, nrow=2)

name_levels <- c("CALFIRE_ButteCo", "ButteSheriff", "ChicoFD")
status_colors <- c("#0070C0", "#00B050", "#FFC000")
no_outliers$screen_name <- factor(no_outliers$screen_name, levels=name_levels, ordered=TRUE)

positions <- c(-2, -0.5, -1.0, 2, -1.5, 1.5, 1, .5, 2.5, -2.5)
directions <- c(1, -1)

line_pos <- data.frame(
    "created_at_pst"=unique(no_outliers$created_at_pst),
    "position"=rep(positions, length.out=length(unique(no_outliers$created_at_pst))),
    "direction"=rep(directions, length.out=length(unique(no_outliers$created_at_pst)))
)


no_outliers <- merge(x=no_outliers, y=line_pos, by="created_at_pst", all = TRUE)
no_outliers <- no_outliers[with(no_outliers, order(created_at_pst, screen_name)), ]



hour_buffer <- 2
date_range <- seq(min(no_outliers$created_at_pst) - hours(hour_buffer),
                  max(no_outliers$created_at_pst) + hours(hour_buffer), by='hour')


date_format <- format(date_range, '%H:%M')
date_df <- data.frame(date_range, date_format)


text_offset <- 0.05
no_outliers$text_position <- (text_offset * no_outliers$direction) +
  no_outliers$position

no_outliers$text_output <- substr(no_outliers$text, 1, 20)
timeline_plot<-ggplot(no_outliers,aes(x=created_at_pst,y=0, col=screen_name,
                                      label=text_output)) + labs(col="Tweets")
  
  
#timeline_plot<-timeline_plot+labs(col="Tweets")
timeline_plot<-timeline_plot+scale_color_manual(values=status_colors,
                                                labels=name_levels, drop = FALSE)

timeline_plot<-timeline_plot+theme_classic()

# Plot horizontal black line for timeline
timeline_plot<-timeline_plot+geom_hline(yintercept=0, 
                color = "black", size=0.3)

# Plot vertical segment lines for texts
timeline_plot<-timeline_plot+geom_segment(data=no_outliers,
                                          aes(y=position,yend=0,xend=created_at_pst),
                                          color='black', size=0.2)

# Plot scatter points at zero and date
timeline_plot<-timeline_plot+geom_point(aes(y=0), size=3)

# Don't show axes, appropriately position legend
timeline_plot<-timeline_plot+theme(axis.line.y=element_blank(),
                 axis.text.y=element_blank(),
                 axis.title.x=element_blank(),
                 axis.title.y=element_blank(),
                 axis.ticks.y=element_blank(),
                 axis.text.x =element_blank(),
                 axis.ticks.x =element_blank(),
                 axis.line.x =element_blank(),
                 legend.position = "bottom"
                )

# Show text for each hour
timeline_plot<-timeline_plot+geom_text(data=date_df,
                                aes(x=date_range,y=-0.1, label=date_format),
                                size=2.5,vjust=0.5, color='black', angle=45)

# Show text for each text
timeline_plot<-timeline_plot+geom_text(aes(y=text_position,label=text_output),size=2.5)  


print(timeline_plot)

Second Plot Type

library(vistime)
vistime(no_outliers, events = "text_output", groups = "screen_name", 
                     start = "created_at_pst", end = "created_at_pst")

And another

library(timelineS)
#timelineS(no_outliers, main = "Life of Michael Jackson")
news <- cf %>%
  filter(str_detect(screen_name,"news|News") | str_detect(description, "news|News")) %>%
  filter(verified=="TRUE")

news_orgs <- cf %>%
  users_data() %>%
  distinct(screen_name, .keep_all = TRUE) %>%
  filter(str_detect(screen_name, "news|News") | str_detect(description, "news|News")) %>%
  filter(verified=="TRUE") %>%
  arrange(desc(followers_count)) 

news$user_type <- "news"
public <- anti_join(x = cf, y = news_orgs, by = "screen_name")
public$user_type <- "public"

cf <- rbind(public, news)


top.20.users <- news %>% 
  group_by(screen_name) %>% 
  summarise(n=n()) %>% 
  arrange(desc(n)) %>% 
  slice(1:20)

ggplot(top.20.users, aes(x = reorder(screen_name, -n), y=n)) +
  geom_bar(stat="identity", fill="darkslategray")+
  theme_minimal() + coord_flip() + 
  xlab("Users") + ylab("Count")

Applying Sentiments

I’m interested in the sentiment difference between users who are considered a news outlet vs the general public. We plan to look at more individual political people like the Sheriff when going through this more thoroughly in our project.

ts1 <- tweet_words_nostop %>%
          inner_join(get_sentiments("afinn"))

ts2 <- ts1 %>% group_by(status_id) %>% summarize(sentiment=sum(value))
cf2 <- Sources %>% left_join(ts2, by='status_id')

ggplot(cf2, aes(x=sentiment, col=screen_name)) + geom_density(lwd=2) + theme_minimal()